import jsonlines
from tqdm import tqdm
datas = {}

#with jsonlines.open("7M_all_label/7M_label_tagged.jsonl") as f:
#    for line in tqdm(f):
#        datas.append(line)

#merge_datas = []
#with jsonlines.open("7M_all_label_normalize/infinity-instruct-7M-eng.jsonl") as f:
#    for li, line in tqdm(enumerate(f)):
#        while len(datas) != 0:
#            raw_line = datas.pop(0)
#            if str(raw_line["id"]) + raw_line["conversations"][-1]["value"] + raw_line["conversations"][-2]["value"] != str(line["id"]) + line["conversations"][-1]["value"] + line["conversations"][-2]["value"]:
#                raw_line["label"] = {}
#                merge_datas.append(raw_line)
#                continue
#            else:
#                merge_datas.append(line)
#                break

with jsonlines.open("7M_all_label_normalize/infinity-instruct-7M-eng-merge.jsonl") as f:
    for line in tqdm(f):
        datas[str(line["id"]) + line["conversations"][-1]["value"] + line["conversations"][-2]["value"]] = line

with jsonlines.open("/share/project/yuyang/data_syn_o1/summary_and_infer/infinity_rewarded_7M.jsonl") as f:
    for line in tqdm(f):
        datas[str(line["id"]) + line["conversations"][-1]["value"] + line["conversations"][-2]["value"]]["reward"] = line["reward"]

with jsonlines.open("7M_all_label_normalize/infinity-instruct-7M-eng-reward-merge.jsonl","w") as wf:
    for line in tqdm(datas.values()):
        wf.write(line)
